********************************************************************************
* Inflation Expectations Comparison, Nondurables
* Paper Figure 2
********************************************************************************
clear
clear matrix
set more off
set scheme s1color
estimates clear
graph drop _all
set matsize 2500
log close _all
graph set window fontface "Times New Roman"

** Set Directory
cd "../Do"
** Set Haver Directory
// set haverdir "Haver", perm

********************************************************************************
** HAVER PULL DATA
********************************************************************************
** Import Data
*MB importing data now through October 2013 (instead of only through October 2012)
*note that inflation values are monthly--could try changing tvar to yearq and do 4-quarter inflation instead of 12-month inflation
import haver (pcuslfe pztexp jcxfebm pzrgus cinf1)@usecon , tvar(yearmo) clear fin(2008m9, 2013m10)
ren pcuslfe_usecon cpiu
ren pztexp_usecon pztexp // deflate by pce 
ren jcxfebm_usecon pce
ren pzrgus_usecon reggas
ren cinf1_usecon mich
*MB changing denominator here to L12.cpiu instead of cpiu
tsset yearmo
*gen inflation_yearly = ((cpiu - L12.cpiu) / cpiu)*100
gen inflation_yearly = ((cpiu - L12.cpiu) / L12.cpiu)*100
la var inflation_yearly "Actual Inflation"
gen inflation_yoy_forward = F12.inflation_yearly
gen inflation_yoy_forward2 = ((F12.cpiu-cpiu)/cpiu)*100

gen wti = pztexp/pce
*WTI stands for West Texas Intermediate Crude Oil Price
la var wti "WTI, Deflated by PCE"
*reggas= "regular gas"?
gen reggas2 = reggas/100
*gas price year-over-year percent change
gen gas_infl=((reggas-L12.reggas)/L12.reggas)*100
la var gas_infl "Gas Price Percent Change"

*extrdate year year = yearmo
*extrdate quarter quarter = yearmo
gen year=yofd(dofm(yearmo))
gen yearq=qofd(dofm(yearmo))
format yearq %tq

keep if yearmo>=tm(2008m10)
keep if yearmo<=tm(2013m11)
tempfile haverd
save `haverd'

********************************************************************************
** LOAD AND CLEAN DURABLES DATA
********************************************************************************

use ../Data/matched_data_durables_jun2018_baseline.dta, clear

*labelling the 3 different durables spending variables; all are composites; one is nominal; another is "real" deflated using a single (appliances) cpi; another is "real" deflated on a good-by-good basis (in previous merge file)
la var durables "nominal durable goods spending"
la var durables_real1 "real durable goods spending, single deflator"
la var durables_real2 "real durable goods spending, separate deflators"

drop ethnicity

recode mort (5=0)
recode stocks (5=0)
recode retacct (5=0)
recode howner (5=0)

* Recode some expectations variables from the Inflation surveys. 

* unemployment dummies
recode q6 (1=1) (2 3 = 0), gen(unemp_increase)
recode q6 (3=1) (1 2 = 0), gen(unemp_decrease)

gen conditions_12m = q2a
* note everybody who says "other" is in separate category
recode q2a (1=1) (2 3 = 0), gen(conditions_12m_better)
recode q2a (2=1) (1 3 = 0), gen(conditions_12m_worse)

gen interestrate_12m = q7
recode interestrate_12m (1=1) (2 3 = 0), gen(intrate_12m_up)
recode interestrate_12m (3=1) (1 2 = 0), gen(intrate_12m_down)

gen bconditions_12m = q4
* note everybody who says "other" is in separate category
recode q4 (1=1) (2 3 = 0), gen(bconditions_12m_better)
recode q4 (2=1) (1 3 = 0), gen(bconditions_12m_worse)

* house price forecasts

gen hppoint = .
replace hppoint = 0 if q41==3
replace hppoint = q42 if q41==1
replace hppoint = -q42 if q41==2
* there are some extreme outliers (in the tens of thousands)
replace hppoint = . if abs(hppoint)>=200

la var hppoint "House price expectation"

*** Prepare some additional descriptives
* Race isn't reported in all periods. 
preserve
collapse (mean) race , by(prim_key)
sort prim_key
tempfile race
save `race'
restore

drop race
sort prim_key
merge m:1 prim_key using `race'
tab _merge
drop _merge
drop if prim_key==""

recode race (1=1) (nonmissing = 0), gen(white)
gen nonwhite = 1-white
recode gender (2=1) (1=0), gen(female)
recode highesteducation (4 9 = 0) (10/16 = 1), gen(coll)

* q31s* gives codes. Kind of odd that they've created separate variables. 
*MB: odd to generate employed variable, because everyone in sample is employed--however some also say they are retired (?) 
gen employed = . 
replace employed=1 if q31s1==1
replace employed = 0 if (q31s2==2 | q31s3==3 | q31s4==4 | q31s5==5 | q31s6==6 | q31s7==7) & q31s1!=1

* generate currently retired variable.
*odd that some are retired even though all are employed; how do retired people have wage growth expectations? they may be working a small job anyway
drop retired 
gen retired = . 
replace retired = 1 if q31s5==5
replace retired = 0 if (q31s1==1 | q31s2==2 | q31s3==3 | q31s4==4 | q31s6==6 | q31s7==7) & q31s5!=5
tab retired

*below also added December 2019 from Arman's old code
gen gas_expect = . 
replace gas_expect = 0 if q47a==3
replace gas_expect = q47a_higher if q47a==1
replace gas_expect = -q47a_lower if q47a==2

*MB resume: add "howner_fix" code ?

*replace howner=0 if howner==.
*adds ten people to homeowner status; there are still some with howner_fix==0 who have positive mortgage payment, but missing data for has mortgage and/or mortgage amount
*two observations have howner==0 and mort==1 (say they're not a homeowner but they say they have a mortgage); not important so not recoding them for now

drop if mort==0 & amtmort!=. & amtmort>100
*fixing the mort indicator to impute plausible values for people based on other information
replace mort = 0 if (howner!=1 | mortgage==0) & mort==.
replace mort = 1 if (howner==1 | (mortgage>0 & mortgage!=.)) & mort==.

la var mort "Has Mortgage"

*MB: generating quasi-continuous income variable based on midpoint of ranges of annual household income variables (familyincome and familyincome_part2)
rename familyincome inc2
rename familyincome_part2 inc2_2
drop if inc2==.
*here is the income variable: "new_faminc"
gen new_faminc=.
replace new_faminc=2500 if inc2==1
replace new_faminc=6250 if inc2==2
replace new_faminc=8750 if inc2==3
replace new_faminc=11250 if inc2==4
replace new_faminc=13750 if inc2==5
replace new_faminc=17500 if inc2==6
replace new_faminc=22500 if inc2==7
replace new_faminc=27500 if inc2==8
replace new_faminc=32500 if inc2==9
replace new_faminc=37500 if inc2==10
replace new_faminc=45000 if inc2==11
replace new_faminc=55000 if inc2==12
replace new_faminc=67500 if inc2==13
replace new_faminc=87500 if inc2==14 & (inc2_2==1 | inc2_2==.)
*above accounts for one person with inc2==14 and inc2_2 missing; not sure why that's the case but I asigned them the lowest category of income over $75000
replace new_faminc=112500 if inc2==14 & inc2_2==2
replace new_faminc=162500 if inc2==14 & inc2_2==3
replace new_faminc=237500 if inc2==14 & inc2_2==4

gen log_new_faminc=log(new_faminc)

sort prim_key quarter
*bringing in SAMPLE WEIGHTS, to quarterly data: warning, we might lose observations if the set requiring weights has changed ; this will affect our ability to run regs using non-employed types (can only do unweighted) 
merge m:1 prim_key using ../Data/qweights_pooled.dta
*these are the reg weights, the full sample weights will still be called weight_full
*weight variable is called "weight_samp" to indicate the weights were designed for the regression sample
*134 observations dropped that didn't merge with a weight_samp
drop if _merge!=3
drop _merge

*MB December 2019: key juncture: impose different sample restrictions and use different real/nominal durables
*********************
*define regression sample before recentering any variables: dropping extreme values
*below should drop top 2 highest values of durables spending; I doubt it makes any difference 
*JIMIN: try turning on and off the following restrictions
drop if durables>20000  | durables_real1>20000 | durables_real2>20000
drop if prim_key=="5041140:1"
drop if mortgage>200000 & mortgage!=.
drop if d_inflmedian>35
drop if d_longinflmedian>35
drop if hppoint<-50
**end of optional restrictions 

*assigning locals for weights (can turn on or off in regression)
local weights "[pweight=weight_samp]"
local weights_full "[pweight=weight_full]"
local pwfile "_pw"
*drop those with missing values for regressors
la var d_inflmedian "Inflation Expectation (SR)"
la var d_infliqr "Inflation Uncertainty (SR)"
la var d_longinflmedian "Inflation Expectation (MR)"
la var d_longinfliqr "Inflation Uncertainty (MR)"
la var lag_IE "Lagged Inflation Expectation (SR)"
la var lag_infl_iqr "Lagged Infl. Uncertainty (SR)"
la var hppoint "House price expectation"
la var gas_expect "Gas price expectation"

*variable for sum of monthly payments--interactions between this variable and IE may be included in some models
gen payments=mortgage+car if howner==1
replace payments=rent+car if howner!=1
gen log_payments=.
replace log_payments=log(payments) if payments>0
replace log_payments=0 if payments==0
la var log_payments "Fixed Mnthly Paymnts (Log)"
*drop any observations with extreme value for payments (110,000): only if running a regression interacting with payments: actually none dropped here (unlike nondurables)
drop if payments>100000

*defining sample; removing those with missing values
drop if weight_samp==.
drop if d_inflmedian==.
drop if d_infliqr==.
drop if durables==.
*dropping lagged IE: only need if we include lag IE in regression 
drop if lag_IE==.
drop if lag_infl_iqr==.
*new income variable: new_faminc is recode of categorical variables familyincome and familyincome_part2; former variable was earnings last month and highly unreliable
drop if new_faminc==.
drop if intrate_12m_up==.
drop if intrate_12m_down==.
drop if unemp_increase==.
drop if unemp_decrease==.
drop if rw_expect==.
drop if d_wageiqr==.
drop if rage==.
drop if nonwhite==.
drop if female==.
drop if coll==.
drop if retired==.
drop if mort==.
*below results in loss of 300+ observations--results are robust not imposing this restriction and omitting hppoint from regressions
drop if hppoint==.
*below drops 111 observations--robustness applies again 
drop if howner==.

destring prim_key, generate(id_new) ignore(":")

*generate total durables spending within household
*add to below: use durables_real1 and durables_real2 instead--number of observations of "durables" per household will be identical to that for either "durables_real1" or "durables_real2", so no need to repeat below for alternate versions
egen tot_durables=total(durables), by(id_new)
sum tot_durables, d
*below drops 142 observations associated with households who never purchased durables under period of observation (reported zero spending on durables) 
drop if tot_durables==0
*generate variable that equals 1 in all cases, to sum to determine observations per person
gen pre_obs=1
*generate sum of observations per person
egen obs2=total(pre_obs), by(id_new)
sum obs2, d

*restrict on having nonzero durables spending in at least one period (based on total durables spending within household) 
*define sample  based on sufficient observation
gen durables_sample_hp=(obs2>=3)

*Using the above sample, we now generate the median inflation expectation by quarter (based on the quarter of the month in which the expectation was formed) 
 *time series variable is a calendar quarter

gen quarter2 = qofd(expectations_date)
ren quarter2 yearq
format yearq %tq

*this way collapses by expectations quarter ("yearq"); can use only one version
collapse (p50) d_inflmedian (mean) d_infmean = d_inflmedian  if durables_sample_hp==1 [pweight=weight_samp], by(yearq)
tsset yearq
*MB changes 8/12/2019--start changes here (look for "end changes here" below)
*gen L_lag_IE = l4.d_inflmedian //Lagged Med
ren d_inflmedian d_inflmedian_dur
ren d_infmean d_infmean_dur
*ren L_lag_IE L_lag_IE_dur
drop if yearq<tq(2009q4)
tempfile dur 
save `dur'

********************************************************************************
** CREATE FIGURE
********************************************************************************
use `haverd', clear 
collapse (mean) mich cpiu reggas inflation_yearly inflation_yoy_forward inflation_yoy_forward2 reggas2 gas_infl, by(yearq)
sort yearq
tsset yearq
gen inflation_qoq=((cpiu-L4.cpiu)/L4.cpiu)*100
gen inflation_qoq_forward=((F4.cpiu-cpiu)/cpiu)*100
gen gas_infl_qoq=((reggas-L4.reggas)/L4.reggas)*100
drop if yearq<tq(2009q4)
drop if yearq>tq(2012q4)
tempfile haverd2
save `haverd2' 

use `dur', clear

merge 1:1 yearq using `haverd2', gen(havermatch)

sort yearq

** temp code to inspect data--forecast errors etc
*gen forecast_err=d_inflmedian_dur-inflation_yoy_forward2
*sum forecast_err, d
***

* Figure 2 Inflation Expectations Comparison
#delimit ; 
twoway 
(connected d_inflmedian_dur yearq, mcolor(black) msize(small) lcolor(black) yaxis(1))
(connected inflation_yoy_forward2 yearq, mcolor(green) msize(small) lcolor(green) yaxis(1))
(connected mich yearq, mcolor(red) msize(small) lcolor(red) yaxis(1)),
ylabel(1.5(0.5)4.5, angle(horizontal) labsize(small) axis(1)) ytitle("Percent", axis(1) size(small))
tlabel(2009q4 "2009Q4" 2010q4 "2010Q4" 2011q4 "2011Q4" 2012q4 "2012Q4", angle(horizontal) labsize(small)) xtitle("") 
/*title("Figure 2. RAND{stMono:-}ALP Inflation Expectations vs. Realized Inflation and Michigan Inflation Expectations", size(small)) */
legend(region(lstyle(none)) rows(3) colfirst symx(*.4) size(small) 
order(1 "RAND{stMono:-}ALP Median Inflation Expectation (Durables Sample)" 2 "One{stMono:-}Year Forward Actual Inflation Rate (CPI{stMono:-}U)" 3 "Michigan Survey Median Inflation Expectation")) 
/*note("Notes: Median inflation expectation refers to the median one{stMono:-}year{stMono:-}ahead inflation expectation for the given quarter,"
"based on the dates on which subjects completed the expectations surveys.", size(vsmall))*/
;

#delimit cr
graph export ../Figures/figure_2.png, as(png) replace

set scheme s1mono

#delimit ; 
twoway 
(connected d_inflmedian_dur yearq, /* mcolor(black) */ msize(small) /* lcolor(black) */ yaxis(1))
(connected inflation_yoy_forward2 yearq, /* mcolor(green) */ msize(small) /* lcolor(green) */ yaxis(1))
(connected mich yearq,/*  mcolor(red) */ msize(small) /* lcolor(red) */ yaxis(1)),
ylabel(1.5(0.5)4.5, angle(horizontal) labsize(small) axis(1)) ytitle("Percent", axis(1) size(small))
tlabel(2009q4 "2009Q4" 2010q4 "2010Q4" 2011q4 "2011Q4" 2012q4 "2012Q4", angle(horizontal) labsize(small)) xtitle("") 
/*title("Figure 2. RAND{stMono:-}ALP Inflation Expectations vs. Realized Inflation and Michigan Inflation Expectations", size(small)) */
legend(region(lstyle(none)) rows(3) colfirst symx(*.4) size(small) 
order(1 "RAND{stMono:-}ALP Median Inflation Expectation (Durables Sample)" 2 "One{stMono:-}Year Forward Actual Inflation Rate (CPI{stMono:-}U)" 3 "Michigan Survey Median Inflation Expectation")) 
/*note("Notes: Median inflation expectation refers to the median one{stMono:-}year{stMono:-}ahead inflation expectation for the given quarter,"
"based on the dates on which subjects completed the expectations surveys.", size(vsmall))*/
;

#delimit cr
graph export ../Figures/figure_2_bw.png, as(png) replace
